{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "50ba299e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All libraries imported successfully\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from glob import glob\n", "import os\n", "import librosa\n", "import librosa.display\n", "import IPython.display as ipd\n", "from itertools import cycle\n", "import time\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", "\n", "print(\"All libraries imported successfully\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "ab101339", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading all musical notes from: /Users/Parag/Documents/TMRT_data\n", "A: 2 file(s) found\n", "A#: 2 file(s) found\n", "B: 2 file(s) found\n", "C: 2 file(s) found\n", "C#: 2 file(s) found\n", "D: 2 file(s) found\n", "D#: 2 file(s) found\n", "E: 2 file(s) found\n", "F: 2 file(s) found\n", "F#: 2 file(s) found\n", "G: 2 file(s) found\n", "G#: 2 file(s) found\n", "\n", "Total notes with files: 12\n", "Total audio files: 24\n" ] } ], "source": [ "note_list = [\"A\", \"A#\", \"B\", \"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\"]\n", "base_path = \"/Users/Parag/Documents/TMRT_data\"\n", "\n", "print(\"Loading all musical notes from:\", base_path)\n", "\n", "audio_files = {}\n", "total_files = 0\n", "\n", "for note in note_list:\n", " patterns = [\n", " f\"{base_path}/note{note}.mp3\",\n", " f\"{base_path}/Note{note}.mp3\", \n", " f\"{base_path}/{note}.mp3\",\n", " f\"{base_path}/{note.lower()}.mp3\"\n", " ]\n", " \n", " files_found = []\n", " for pattern in patterns:\n", " files_found.extend(glob(pattern))\n", " \n", " if files_found:\n", " audio_files[note] = files_found\n", " total_files += len(files_found)\n", " print(f\"{note}: {len(files_found)} file(s) found\")\n", " else:\n", " print(f\"{note}: No files found\")\n", "\n", "print(f\"\\nTotal notes with files: {len(audio_files)}\")\n", "print(f\"Total audio files: {total_files}\")\n", "\n", "if len(audio_files) == 0:\n", " print(\"ERROR: No audio files found!\")\n", " print(\"Check path exists:\", base_path)\n", " print(\"Files should be named like: noteA.mp3, noteB.mp3, etc.\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "9ee84bdb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading complete audio data for all notes\n", "Loading Note A...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note A:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note A#...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note A#:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note B...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note B:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note C...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note C:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note C#...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note C#:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note D...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note D:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note D#...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note D#:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note E...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note E:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note F...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note F:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note F#...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note F#:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note G...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note G:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Loading Note G#...\n", " Successfully loaded - Duration: 20.13s, Sample rate: 22050 Hz\n", " Playing Note G#:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Successfully loaded: 12 notes\n", "Consistent sample rate: 22050 Hz\n" ] } ], "source": [ "print(\"Loading complete audio data for all notes\")\n", "\n", "audio_data = {}\n", "sample_rates = {}\n", "\n", "for note in audio_files.keys():\n", " print(f\"Loading Note {note}...\")\n", " \n", " try:\n", " file_path = audio_files[note][0]\n", " y, sr = librosa.load(file_path)\n", " audio_data[note] = y\n", " sample_rates[note] = sr\n", " \n", " duration = len(y) / sr\n", " print(f\" Successfully loaded - Duration: {duration:.2f}s, Sample rate: {sr} Hz\")\n", " \n", " # Play audio sample\n", " print(f\" Playing Note {note}:\")\n", " display(ipd.Audio(y, rate=sr))\n", " \n", " except Exception as e:\n", " print(f\" Error loading {note}: {e}\")\n", " continue\n", "\n", "print(f\"\\nSuccessfully loaded: {len(audio_data)} notes\")\n", "unique_srs = set(sample_rates.values())\n", "if len(unique_srs) == 1:\n", " global_sr = list(unique_srs)[0]\n", " print(f\"Consistent sample rate: {global_sr} Hz\")\n", "else:\n", " global_sr = max(unique_srs)\n", " print(f\"Multiple sample rates found, using: {global_sr} Hz\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "44d03e11", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature extraction function defined\n" ] } ], "source": [ "def extract_comprehensive_features(audio_data, sample_rate):\n", " try:\n", " features = {}\n", " \n", " # MFCC Features\n", " mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)\n", " features['mfcc_mean'] = np.mean(mfcc, axis=1)\n", " features['mfcc_std'] = np.std(mfcc, axis=1)\n", " \n", " # Chroma Features\n", " chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)\n", " features['chroma_mean'] = np.mean(chroma, axis=1)\n", " features['chroma_std'] = np.std(chroma, axis=1)\n", " \n", " # Spectral Centroid\n", " spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)[0]\n", " features['spectral_centroid_mean'] = np.mean(spectral_centroid)\n", " features['spectral_centroid_std'] = np.std(spectral_centroid)\n", " \n", " # Zero Crossing Rate\n", " zcr = librosa.feature.zero_crossing_rate(audio_data)[0]\n", " features['zcr_mean'] = np.mean(zcr)\n", " features['zcr_std'] = np.std(zcr)\n", " \n", " # RMS Energy\n", " rms = librosa.feature.rms(y=audio_data)[0]\n", " features['rms_mean'] = np.mean(rms)\n", " features['rms_std'] = np.std(rms)\n", " \n", " # Spectral Rolloff\n", " spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_data, sr=sample_rate)[0]\n", " features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)\n", " features['spectral_rolloff_std'] = np.std(spectral_rolloff)\n", " \n", " # Spectral Bandwidth\n", " spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sample_rate)[0]\n", " features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)\n", " features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)\n", " \n", " # Tempo\n", " try:\n", " tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sample_rate)\n", " features['tempo'] = float(tempo.item() if hasattr(tempo, 'item') else tempo)\n", " except:\n", " features['tempo'] = 120.0\n", " \n", " # Pitch\n", " try:\n", " pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sample_rate)\n", " pitch_values = []\n", " for t in range(pitches.shape[1]):\n", " index = magnitudes[:, t].argmax()\n", " pitch = pitches[index, t]\n", " if pitch > 0:\n", " pitch_values.append(pitch)\n", " \n", " if pitch_values:\n", " features['pitch_mean'] = np.mean(pitch_values)\n", " features['pitch_std'] = np.std(pitch_values)\n", " else:\n", " features['pitch_mean'] = 0.0\n", " features['pitch_std'] = 0.0\n", " except:\n", " features['pitch_mean'] = 0.0\n", " features['pitch_std'] = 0.0\n", " \n", " # Flatten all features into a single vector\n", " feature_vector = []\n", " \n", " # Add scalar features\n", " scalar_features = [\n", " 'spectral_centroid_mean', 'spectral_centroid_std',\n", " 'zcr_mean', 'zcr_std',\n", " 'rms_mean', 'rms_std',\n", " 'spectral_rolloff_mean', 'spectral_rolloff_std',\n", " 'spectral_bandwidth_mean', 'spectral_bandwidth_std',\n", " 'tempo', 'pitch_mean', 'pitch_std'\n", " ]\n", " \n", " for feat in scalar_features:\n", " feature_vector.append(features[feat])\n", " \n", " # Add array features\n", " feature_vector.extend(features['mfcc_mean'].flatten())\n", " feature_vector.extend(features['mfcc_std'].flatten())\n", " feature_vector.extend(features['chroma_mean'].flatten())\n", " feature_vector.extend(features['chroma_std'].flatten())\n", " \n", " return np.array(feature_vector, dtype=np.float64)\n", " \n", " except Exception as e:\n", " print(f\"Feature extraction error: {e}\")\n", " return np.zeros(75, dtype=np.float64)\n", "\n", "print(\"Feature extraction function defined\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "0fd6d94e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Creating multi-class training dataset\n", "Processing 12 musical notes\n", "Processing Note A:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note A#:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note B:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note C:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note C#:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note D:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note D#:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note E:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note F:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note F#:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note G:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "Processing Note G#:\n", " Audio length: 20.13 seconds (443,904 samples)\n", " Created 12 snippets\n", "\n", "Total snippets: 144\n", "Class balance - Min: 12, Max: 12, Ratio: 1.00\n", "\n", "Extracting features from all snippets...\n", "Label encoding: {'A': 0, 'A#': 1, 'B': 2, 'C': 3, 'C#': 4, 'D': 5, 'D#': 6, 'E': 7, 'F': 8, 'F#': 9, 'G': 10, 'G#': 11}\n", "[1/12] Processing 12 snippets for Note A...\n", " 12/12 successful (100.0%)\n", "[2/12] Processing 12 snippets for Note A#...\n", " 12/12 successful (100.0%)\n", "[3/12] Processing 12 snippets for Note B...\n", " 12/12 successful (100.0%)\n", "[4/12] Processing 12 snippets for Note C...\n", " 12/12 successful (100.0%)\n", "[5/12] Processing 12 snippets for Note C#...\n", " 12/12 successful (100.0%)\n", "[6/12] Processing 12 snippets for Note D...\n", " 12/12 successful (100.0%)\n", "[7/12] Processing 12 snippets for Note D#...\n", " 12/12 successful (100.0%)\n", "[8/12] Processing 12 snippets for Note E...\n", " 12/12 successful (100.0%)\n", "[9/12] Processing 12 snippets for Note F...\n", " 12/12 successful (100.0%)\n", "[10/12] Processing 12 snippets for Note F#...\n", " 12/12 successful (100.0%)\n", "[11/12] Processing 12 snippets for Note G...\n", " 12/12 successful (100.0%)\n", "[12/12] Processing 12 snippets for Note G#...\n", " 12/12 successful (100.0%)\n", "\n", "Dataset creation complete!\n", "Final dataset shape: (144, 63)\n", "Number of classes: 12\n", "\n", "Class distribution:\n", " A (Class 0): 12 samples ( 8.3%)\n", " A# (Class 1): 12 samples ( 8.3%)\n", " B (Class 2): 12 samples ( 8.3%)\n", " C (Class 3): 12 samples ( 8.3%)\n", " C# (Class 4): 12 samples ( 8.3%)\n", " D (Class 5): 12 samples ( 8.3%)\n", " D# (Class 6): 12 samples ( 8.3%)\n", " E (Class 7): 12 samples ( 8.3%)\n", " F (Class 8): 12 samples ( 8.3%)\n", " F# (Class 9): 12 samples ( 8.3%)\n", " G (Class 10): 12 samples ( 8.3%)\n", " G# (Class 11): 12 samples ( 8.3%)\n" ] } ], "source": [ "if audio_data:\n", " print(\"Creating multi-class training dataset\")\n", " print(f\"Processing {len(audio_data)} musical notes\")\n", " \n", " snippet_duration = 3\n", " overlap_ratio = 0.5\n", " \n", " all_snippets = {}\n", " snippet_counts = {}\n", " \n", " for note, y in audio_data.items():\n", " sr = sample_rates[note]\n", " samples_per_snippet = int(sr * snippet_duration)\n", " hop_length = int(samples_per_snippet * (1 - overlap_ratio))\n", " \n", " print(f\"Processing Note {note}:\")\n", " print(f\" Audio length: {len(y)/sr:.2f} seconds ({len(y):,} samples)\")\n", " \n", " snippets = []\n", " for start_idx in range(0, len(y) - samples_per_snippet, hop_length):\n", " end_idx = start_idx + samples_per_snippet\n", " snippet = y[start_idx:end_idx]\n", " \n", " if np.max(np.abs(snippet)) > 0.001:\n", " snippets.append(snippet)\n", " \n", " all_snippets[note] = snippets\n", " snippet_counts[note] = len(snippets)\n", " \n", " print(f\" Created {len(snippets)} snippets\")\n", " \n", " total_snippets = sum(snippet_counts.values())\n", " print(f\"\\nTotal snippets: {total_snippets}\")\n", " \n", " min_snippets = min(snippet_counts.values())\n", " max_snippets = max(snippet_counts.values())\n", " imbalance_ratio = max_snippets / min_snippets if min_snippets > 0 else float('inf')\n", " \n", " print(f\"Class balance - Min: {min_snippets}, Max: {max_snippets}, Ratio: {imbalance_ratio:.2f}\")\n", " \n", " print(f\"\\nExtracting features from all snippets...\")\n", " \n", " all_features = []\n", " all_labels = []\n", " \n", " label_encoder = LabelEncoder()\n", " note_names = list(all_snippets.keys())\n", " label_encoder.fit(note_names)\n", " \n", " print(f\"Label encoding: {dict(zip(note_names, label_encoder.transform(note_names)))}\")\n", " \n", " for note_idx, (note, snippets) in enumerate(all_snippets.items()):\n", " print(f\"[{note_idx+1}/{len(all_snippets)}] Processing {len(snippets)} snippets for Note {note}...\")\n", " \n", " note_features = []\n", " successful_extractions = 0\n", " failed_extractions = 0\n", " \n", " sr = sample_rates[note]\n", " encoded_label = label_encoder.transform([note])[0]\n", " \n", " for i, snippet in enumerate(snippets):\n", " try:\n", " features = extract_comprehensive_features(snippet, sr)\n", " \n", " if np.all(np.isfinite(features)) and not np.all(features == 0):\n", " note_features.append(features)\n", " all_labels.append(encoded_label)\n", " successful_extractions += 1\n", " else:\n", " failed_extractions += 1\n", " \n", " except Exception as e:\n", " failed_extractions += 1\n", " \n", " all_features.extend(note_features)\n", " print(f\" {successful_extractions}/{len(snippets)} successful ({successful_extractions/len(snippets)*100:.1f}%)\")\n", " \n", " X_multi = np.array(all_features)\n", " y_multi = np.array(all_labels)\n", " \n", " print(f\"\\nDataset creation complete!\")\n", " print(f\"Final dataset shape: {X_multi.shape}\")\n", " print(f\"Number of classes: {len(np.unique(y_multi))}\")\n", " \n", " unique_classes, class_counts = np.unique(y_multi, return_counts=True)\n", " print(f\"\\nClass distribution:\")\n", " for class_idx, count in zip(unique_classes, class_counts):\n", " note_name = label_encoder.inverse_transform([class_idx])[0]\n", " percentage = (count / len(y_multi)) * 100\n", " print(f\" {note_name} (Class {class_idx}): {count:4d} samples ({percentage:5.1f}%)\")\n", " \n", "else:\n", " print(\"No audio data available for dataset creation\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "c22eaf83", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defined 10 machine learning models\n", " K-Nearest Neighbors: Classifies based on 5 closest training samples\n", " Support Vector Machine: Multi-class SVM with RBF kernel\n", " Random Forest: Ensemble of 200 decision trees\n", " Logistic Regression: One-vs-Rest logistic regression\n", " Decision Tree: Decision tree with controlled depth\n", " Naive Bayes: Gaussian Naive Bayes\n", " Neural Network: Multi-layer perceptron with 3 hidden layers\n", " Gradient Boosting: Gradient boosting with 200 estimators\n", " AdaBoost: AdaBoost with SAMME algorithm\n", " Linear Discriminant Analysis: LDA for linear combinations\n" ] } ], "source": [ "models_multi = {\n", " 'K-Nearest Neighbors': {\n", " 'model': KNeighborsClassifier(n_neighbors=5),\n", " 'explanation': 'Classifies based on 5 closest training samples'\n", " },\n", " 'Support Vector Machine': {\n", " 'model': SVC(kernel='rbf', probability=True, random_state=42, C=1.0),\n", " 'explanation': 'Multi-class SVM with RBF kernel'\n", " },\n", " 'Random Forest': {\n", " 'model': RandomForestClassifier(n_estimators=200, random_state=42, max_depth=15),\n", " 'explanation': 'Ensemble of 200 decision trees'\n", " },\n", " 'Logistic Regression': {\n", " 'model': LogisticRegression(random_state=42, max_iter=2000, multi_class='ovr'),\n", " 'explanation': 'One-vs-Rest logistic regression'\n", " },\n", " 'Decision Tree': {\n", " 'model': DecisionTreeClassifier(random_state=42, max_depth=20, min_samples_split=5),\n", " 'explanation': 'Decision tree with controlled depth'\n", " },\n", " 'Naive Bayes': {\n", " 'model': GaussianNB(),\n", " 'explanation': 'Gaussian Naive Bayes'\n", " },\n", " 'Neural Network': {\n", " 'model': MLPClassifier(\n", " hidden_layer_sizes=(200, 100, 50), \n", " random_state=42, \n", " max_iter=2000,\n", " alpha=0.001,\n", " learning_rate_init=0.001\n", " ),\n", " 'explanation': 'Multi-layer perceptron with 3 hidden layers'\n", " },\n", " 'Gradient Boosting': {\n", " 'model': GradientBoostingClassifier(\n", " n_estimators=200, \n", " random_state=42,\n", " learning_rate=0.1,\n", " max_depth=8\n", " ),\n", " 'explanation': 'Gradient boosting with 200 estimators'\n", " },\n", " 'AdaBoost': {\n", " 'model': AdaBoostClassifier(\n", " n_estimators=100,\n", " random_state=42,\n", " learning_rate=1.0,\n", " algorithm='SAMME'\n", " ),\n", " 'explanation': 'AdaBoost with SAMME algorithm'\n", " },\n", " 'Linear Discriminant Analysis': {\n", " 'model': LinearDiscriminantAnalysis(),\n", " 'explanation': 'LDA for linear combinations'\n", " }\n", "}\n", "\n", "print(f\"Defined {len(models_multi)} machine learning models\")\n", "for name, info in models_multi.items():\n", " print(f\" {name}: {info['explanation']}\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "42c698a9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preparing multi-class dataset for training\n", "Dataset Overview:\n", " Samples: 144\n", " Features: 63\n", " Classes: 12\n", " No NaN/infinite values found\n", " All features have non-zero variance\n", " Feature ranges: -560.679 to 1533.001\n", "\n", "Class balance metrics:\n", " Smallest class: 12 samples\n", " Largest class: 12 samples\n", " Imbalance ratio: 1.00\n", " Good class balance\n", "\n", "Splitting dataset with stratification\n", "Dataset split complete:\n", " Training: 100 samples (69.4%)\n", " Validation: 15 samples (10.4%)\n", " Testing: 29 samples (20.1%)\n", "Applying feature scaling...\n", "Scaling complete:\n", " Training mean: 0.000000\n", " Training std: 1.000000\n", " Feature range: -4.559 to 6.075\n", "\n", "Multi-class dataset ready for machine learning\n", " 100 training samples\n", " 15 validation samples\n", " 29 test samples\n", " 12 musical note classes\n" ] } ], "source": [ "if 'X_multi' in locals() and 'y_multi' in locals():\n", " print(\"Preparing multi-class dataset for training\")\n", " \n", " print(f\"Dataset Overview:\")\n", " print(f\" Samples: {X_multi.shape[0]:,}\")\n", " print(f\" Features: {X_multi.shape[1]}\")\n", " print(f\" Classes: {len(np.unique(y_multi))}\")\n", " \n", " # Data quality checks\n", " nan_samples = np.any(~np.isfinite(X_multi), axis=1)\n", " nan_count = np.sum(nan_samples)\n", " \n", " if nan_count > 0:\n", " print(f\" Found {nan_count} samples with NaN/infinite values - removing\")\n", " valid_mask = ~nan_samples\n", " X_multi = X_multi[valid_mask]\n", " y_multi = y_multi[valid_mask]\n", " print(f\" Dataset cleaned: {X_multi.shape[0]:,} valid samples remaining\")\n", " else:\n", " print(f\" No NaN/infinite values found\")\n", " \n", " feature_variances = np.var(X_multi, axis=0)\n", " zero_var_features = np.sum(feature_variances == 0)\n", " \n", " if zero_var_features > 0:\n", " print(f\" Found {zero_var_features} zero-variance features\")\n", " else:\n", " print(f\" All features have non-zero variance\")\n", " \n", " print(f\" Feature ranges: {X_multi.min():.3f} to {X_multi.max():.3f}\")\n", " \n", " unique_classes, class_counts = np.unique(y_multi, return_counts=True)\n", " min_class_size = np.min(class_counts)\n", " max_class_size = np.max(class_counts)\n", " class_imbalance = max_class_size / min_class_size\n", " \n", " print(f\"\\nClass balance metrics:\")\n", " print(f\" Smallest class: {min_class_size} samples\")\n", " print(f\" Largest class: {max_class_size} samples\")\n", " print(f\" Imbalance ratio: {class_imbalance:.2f}\")\n", " \n", " if class_imbalance > 3.0:\n", " print(f\" WARNING: Significant class imbalance detected\")\n", " elif class_imbalance > 2.0:\n", " print(f\" Moderate class imbalance - using stratified split\")\n", " else:\n", " print(f\" Good class balance\")\n", " \n", " print(f\"\\nSplitting dataset with stratification\")\n", " \n", " test_size = 0.2\n", " val_size = 0.1\n", " \n", " X_temp, X_test_multi, y_temp, y_test_multi = train_test_split(\n", " X_multi, y_multi, \n", " test_size=test_size, \n", " random_state=42, \n", " stratify=y_multi\n", " )\n", " \n", " X_train_multi, X_val_multi, y_train_multi, y_val_multi = train_test_split(\n", " X_temp, y_temp,\n", " test_size=val_size/(1-test_size),\n", " random_state=42,\n", " stratify=y_temp\n", " )\n", " \n", " print(f\"Dataset split complete:\")\n", " print(f\" Training: {X_train_multi.shape[0]:,} samples ({X_train_multi.shape[0]/len(X_multi)*100:.1f}%)\")\n", " print(f\" Validation: {X_val_multi.shape[0]:,} samples ({X_val_multi.shape[0]/len(X_multi)*100:.1f}%)\")\n", " print(f\" Testing: {X_test_multi.shape[0]:,} samples ({X_test_multi.shape[0]/len(X_multi)*100:.1f}%)\")\n", " \n", " scaler_multi = StandardScaler()\n", " \n", " print(f\"Applying feature scaling...\")\n", " X_train_scaled_multi = scaler_multi.fit_transform(X_train_multi)\n", " X_val_scaled_multi = scaler_multi.transform(X_val_multi)\n", " X_test_scaled_multi = scaler_multi.transform(X_test_multi)\n", " \n", " print(f\"Scaling complete:\")\n", " print(f\" Training mean: {X_train_scaled_multi.mean():.6f}\")\n", " print(f\" Training std: {X_train_scaled_multi.std():.6f}\")\n", " print(f\" Feature range: {X_train_scaled_multi.min():.3f} to {X_train_scaled_multi.max():.3f}\")\n", " \n", " print(f\"\\nMulti-class dataset ready for machine learning\")\n", " print(f\" {X_train_scaled_multi.shape[0]:,} training samples\")\n", " print(f\" {X_val_scaled_multi.shape[0]:,} validation samples\") \n", " print(f\" {X_test_scaled_multi.shape[0]:,} test samples\")\n", " print(f\" {len(np.unique(y_multi))} musical note classes\")\n", " \n", "else:\n", " print(\"Multi-class dataset not found. Please run dataset creation first.\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "98b41024", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Comprehensive multi-class model training\n", "Training 10 models on 12 musical note classes\n", "Training set: 100 samples\n", "Validation set: 15 samples\n", "Test set: 29 samples\n", "[ 1/10] K-Nearest Neighbors\n", " Training K-Nearest Neighbors...\n", " Train Accuracy: 0.9500\n", " Val Accuracy: 0.8000\n", " Test Accuracy: 0.8966\n", " CV Score: 0.9600 (+/- 0.0374)\n", " Training Time: 0.0028s\n", " Overfitting: 0.1500\n", " F1 Score: 0.7556\n", " WARNING: High overfitting detected\n", "[ 2/10] Support Vector Machine\n", " Training Support Vector Machine...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 0.8000\n", " Test Accuracy: 0.8966\n", " CV Score: 0.9400 (+/- 0.0490)\n", " Training Time: 0.0110s\n", " Overfitting: 0.2000\n", " F1 Score: 0.7361\n", " WARNING: High overfitting detected\n", "[ 3/10] Random Forest\n", " Training Random Forest...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " Test Accuracy: 1.0000\n", " CV Score: 0.9900 (+/- 0.0200)\n", " Training Time: 0.1802s\n", " Overfitting: 0.0000\n", " F1 Score: 1.0000\n", "[ 4/10] Logistic Regression\n", " Training Logistic Regression...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " Test Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 0.0136s\n", " Overfitting: 0.0000\n", " F1 Score: 1.0000\n", "[ 5/10] Decision Tree\n", " Training Decision Tree...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 0.9333\n", " Test Accuracy: 0.9310\n", " CV Score: 0.9700 (+/- 0.0245)\n", " Training Time: 0.0028s\n", " Overfitting: 0.0667\n", " F1 Score: 0.9000\n", "[ 6/10] Naive Bayes\n", " Training Naive Bayes...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 0.7333\n", " Test Accuracy: 0.8621\n", " CV Score: 0.8700 (+/- 0.0510)\n", " Training Time: 0.0005s\n", " Overfitting: 0.2667\n", " F1 Score: 0.6667\n", " WARNING: High overfitting detected\n", "[ 7/10] Neural Network\n", " Training Neural Network...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 0.8000\n", " Test Accuracy: 0.9310\n", " CV Score: 0.9500 (+/- 0.0548)\n", " Training Time: 0.3687s\n", " Overfitting: 0.2000\n", " F1 Score: 0.7361\n", " WARNING: High overfitting detected\n", "[ 8/10] Gradient Boosting\n", " Training Gradient Boosting...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " Test Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 1.4237s\n", " Overfitting: 0.0000\n", " F1 Score: 1.0000\n", "[ 9/10] AdaBoost\n", " Training AdaBoost...\n", " Train Accuracy: 0.6800\n", " Val Accuracy: 0.4667\n", " Test Accuracy: 0.6552\n", " CV Score: 0.5100 (+/- 0.3813)\n", " Training Time: 0.0927s\n", " Overfitting: 0.2133\n", " F1 Score: 0.4630\n", " WARNING: High overfitting detected\n", "[10/10] Linear Discriminant Analysis\n", " Training Linear Discriminant Analysis...\n", " Train Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " Test Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 0.0075s\n", " Overfitting: 0.0000\n", " F1 Score: 1.0000\n", "\n", "Training completed for 10 models\n" ] } ], "source": [ "def evaluate_multiclass_model(name, model, X_train, X_val, X_test, y_train, y_val, y_test):\n", " results = {'name': name}\n", " \n", " print(f\" Training {name}...\")\n", " \n", " start_time = time.time()\n", " model.fit(X_train, y_train)\n", " training_time = time.time() - start_time\n", " results['training_time'] = training_time\n", " \n", " start_time = time.time()\n", " y_val_pred = model.predict(X_val)\n", " val_prediction_time = time.time() - start_time\n", " results['val_prediction_time'] = val_prediction_time\n", " \n", " start_time = time.time()\n", " y_test_pred = model.predict(X_test)\n", " test_prediction_time = time.time() - start_time\n", " results['test_prediction_time'] = test_prediction_time\n", " \n", " train_accuracy = model.score(X_train, y_train)\n", " val_accuracy = accuracy_score(y_val, y_val_pred)\n", " test_accuracy = accuracy_score(y_test, y_test_pred)\n", " \n", " results['train_accuracy'] = train_accuracy\n", " results['val_accuracy'] = val_accuracy\n", " results['test_accuracy'] = test_accuracy\n", " \n", " overfitting = train_accuracy - val_accuracy\n", " results['overfitting'] = overfitting\n", " \n", " try:\n", " cv_folds = min(5, len(np.unique(y_train)))\n", " if cv_folds >= 3:\n", " cv_scores = cross_val_score(model, X_train, y_train, cv=cv_folds, scoring='accuracy')\n", " results['cv_mean'] = cv_scores.mean()\n", " results['cv_std'] = cv_scores.std()\n", " else:\n", " results['cv_mean'] = val_accuracy\n", " results['cv_std'] = 0.0\n", " except Exception as e:\n", " results['cv_mean'] = val_accuracy\n", " results['cv_std'] = 0.0\n", " \n", " try:\n", " class_report = classification_report(y_val, y_val_pred, output_dict=True, zero_division=0)\n", " results['precision_macro'] = class_report['macro avg']['precision']\n", " results['recall_macro'] = class_report['macro avg']['recall']\n", " results['f1_macro'] = class_report['macro avg']['f1-score']\n", " except:\n", " results['precision_macro'] = 0.0\n", " results['recall_macro'] = 0.0\n", " results['f1_macro'] = 0.0\n", " \n", " return results, y_val_pred, y_test_pred\n", "\n", "if 'X_train_scaled_multi' in locals():\n", " print(\"Comprehensive multi-class model training\")\n", " print(f\"Training {len(models_multi)} models on {len(np.unique(y_train_multi))} musical note classes\")\n", " print(f\"Training set: {X_train_scaled_multi.shape[0]:,} samples\")\n", " print(f\"Validation set: {X_val_scaled_multi.shape[0]:,} samples\")\n", " print(f\"Test set: {X_test_scaled_multi.shape[0]:,} samples\")\n", " \n", " all_results_multi = []\n", " trained_models_multi = {}\n", " model_predictions = {}\n", " \n", " total_models = len(models_multi)\n", " \n", " for i, (name, info) in enumerate(models_multi.items(), 1):\n", " print(f\"[{i:2d}/{total_models}] {name}\")\n", " \n", " try:\n", " model = info['model']\n", " \n", " results, y_val_pred, y_test_pred = evaluate_multiclass_model(\n", " name, model, \n", " X_train_scaled_multi, X_val_scaled_multi, X_test_scaled_multi,\n", " y_train_multi, y_val_multi, y_test_multi\n", " )\n", " \n", " all_results_multi.append(results)\n", " trained_models_multi[name] = model\n", " model_predictions[name] = {\n", " 'val_pred': y_val_pred,\n", " 'test_pred': y_test_pred\n", " }\n", " \n", " print(f\" Train Accuracy: {results['train_accuracy']:.4f}\")\n", " print(f\" Val Accuracy: {results['val_accuracy']:.4f}\")\n", " print(f\" Test Accuracy: {results['test_accuracy']:.4f}\")\n", " print(f\" CV Score: {results['cv_mean']:.4f} (+/- {results['cv_std']:.4f})\")\n", " print(f\" Training Time: {results['training_time']:.4f}s\")\n", " print(f\" Overfitting: {results['overfitting']:.4f}\")\n", " print(f\" F1 Score: {results['f1_macro']:.4f}\")\n", " \n", " if results['overfitting'] > 0.1:\n", " print(f\" WARNING: High overfitting detected\")\n", " \n", " except Exception as e:\n", " print(f\" ERROR training {name}: {e}\")\n", " continue\n", " \n", " print(f\"\\nTraining completed for {len(all_results_multi)} models\")\n", " \n", "else:\n", " print(\"Scaled training data not available. Run dataset preparation first.\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "d159c928", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model Performance Summary\n", "================================================================================\n", "FINAL MODEL RANKINGS (by Test Accuracy):\n", "--------------------------------------------------------------------------------\n", " 1. Random Forest\n", " Test Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " CV Score: 0.9900 (+/- 0.0200)\n", " Training Time: 0.1802s\n", " F1 Score: 1.0000\n", " Overfitting: 0.0000\n", "\n", " 2. Logistic Regression\n", " Test Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 0.0136s\n", " F1 Score: 1.0000\n", " Overfitting: 0.0000\n", "\n", " 3. Gradient Boosting\n", " Test Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 1.4237s\n", " F1 Score: 1.0000\n", " Overfitting: 0.0000\n", "\n", " 4. Linear Discriminant Analysis\n", " Test Accuracy: 1.0000\n", " Val Accuracy: 1.0000\n", " CV Score: 1.0000 (+/- 0.0000)\n", " Training Time: 0.0075s\n", " F1 Score: 1.0000\n", " Overfitting: 0.0000\n", "\n", " 5. Decision Tree\n", " Test Accuracy: 0.9310\n", " Val Accuracy: 0.9333\n", " CV Score: 0.9700 (+/- 0.0245)\n", " Training Time: 0.0028s\n", " F1 Score: 0.9000\n", " Overfitting: 0.0667\n", "\n", " 6. Neural Network\n", " Test Accuracy: 0.9310\n", " Val Accuracy: 0.8000\n", " CV Score: 0.9500 (+/- 0.0548)\n", " Training Time: 0.3687s\n", " F1 Score: 0.7361\n", " Overfitting: 0.2000\n", "\n", " 7. K-Nearest Neighbors\n", " Test Accuracy: 0.8966\n", " Val Accuracy: 0.8000\n", " CV Score: 0.9600 (+/- 0.0374)\n", " Training Time: 0.0028s\n", " F1 Score: 0.7556\n", " Overfitting: 0.1500\n", "\n", " 8. Support Vector Machine\n", " Test Accuracy: 0.8966\n", " Val Accuracy: 0.8000\n", " CV Score: 0.9400 (+/- 0.0490)\n", " Training Time: 0.0110s\n", " F1 Score: 0.7361\n", " Overfitting: 0.2000\n", "\n", " 9. Naive Bayes\n", " Test Accuracy: 0.8621\n", " Val Accuracy: 0.7333\n", " CV Score: 0.8700 (+/- 0.0510)\n", " Training Time: 0.0005s\n", " F1 Score: 0.6667\n", " Overfitting: 0.2667\n", "\n", "10. AdaBoost\n", " Test Accuracy: 0.6552\n", " Val Accuracy: 0.4667\n", " CV Score: 0.5100 (+/- 0.3813)\n", " Training Time: 0.0927s\n", " F1 Score: 0.4630\n", " Overfitting: 0.2133\n", "\n", "KEY PERFORMANCE INSIGHTS:\n", "Best Test Accuracy: Random Forest (1.0000)\n", "Fastest Training: Naive Bayes (0.0005s)\n", "Best Cross-Validation: Logistic Regression (1.0000)\n", "Least Overfitting: Random Forest (0.0000)\n", "\n", "PERFORMANCE STATISTICS:\n", "High Accuracy Models (>90%): 6/10\n", "Fast Training Models (<1s): 9/10\n", "Stable Models (low overfitting): 5/10\n", "\n", "OVERALL STATISTICS:\n", "Mean Test Accuracy: 0.9172 (+/- 0.1056)\n", "Mean Training Time: 0.2104s\n", "\n", "All 10 models successfully trained and evaluated\n", "Dataset: 12 musical notes, 144 samples, 63 features\n" ] } ], "source": [ "if 'all_results_multi' in locals() and all_results_multi:\n", " print(\"Model Performance Summary\")\n", " print(\"=\" * 80)\n", " \n", " # Convert results to DataFrame for analysis\n", " results_df = pd.DataFrame(all_results_multi)\n", " results_df = results_df.sort_values('test_accuracy', ascending=False)\n", " \n", " print(\"FINAL MODEL RANKINGS (by Test Accuracy):\")\n", " print(\"-\" * 80)\n", " \n", " for idx, (_, row) in enumerate(results_df.iterrows(), 1):\n", " print(f\"{idx:2d}. {row['name']}\")\n", " print(f\" Test Accuracy: {row['test_accuracy']:.4f}\")\n", " print(f\" Val Accuracy: {row['val_accuracy']:.4f}\")\n", " print(f\" CV Score: {row['cv_mean']:.4f} (+/- {row['cv_std']:.4f})\")\n", " print(f\" Training Time: {row['training_time']:.4f}s\")\n", " print(f\" F1 Score: {row['f1_macro']:.4f}\")\n", " print(f\" Overfitting: {row['overfitting']:.4f}\")\n", " print()\n", " \n", " # Best performers\n", " best_accuracy = results_df.iloc[0]\n", " fastest_training = results_df.loc[results_df['training_time'].idxmin()]\n", " best_cv = results_df.loc[results_df['cv_mean'].idxmax()]\n", " least_overfitting = results_df.loc[results_df['overfitting'].idxmin()]\n", " \n", " print(\"KEY PERFORMANCE INSIGHTS:\")\n", " print(f\"Best Test Accuracy: {best_accuracy['name']} ({best_accuracy['test_accuracy']:.4f})\")\n", " print(f\"Fastest Training: {fastest_training['name']} ({fastest_training['training_time']:.4f}s)\")\n", " print(f\"Best Cross-Validation: {best_cv['name']} ({best_cv['cv_mean']:.4f})\")\n", " print(f\"Least Overfitting: {least_overfitting['name']} ({least_overfitting['overfitting']:.4f})\")\n", " \n", " # Performance statistics\n", " high_accuracy_models = len(results_df[results_df['test_accuracy'] > 0.90])\n", " fast_models = len(results_df[results_df['training_time'] < 1.0])\n", " stable_models = len(results_df[results_df['overfitting'] < 0.1])\n", " \n", " print(f\"\\nPERFORMANCE STATISTICS:\")\n", " print(f\"High Accuracy Models (>90%): {high_accuracy_models}/{len(results_df)}\")\n", " print(f\"Fast Training Models (<1s): {fast_models}/{len(results_df)}\")\n", " print(f\"Stable Models (low overfitting): {stable_models}/{len(results_df)}\")\n", " \n", " # Overall statistics\n", " mean_accuracy = results_df['test_accuracy'].mean()\n", " std_accuracy = results_df['test_accuracy'].std()\n", " mean_training_time = results_df['training_time'].mean()\n", " \n", " print(f\"\\nOVERALL STATISTICS:\")\n", " print(f\"Mean Test Accuracy: {mean_accuracy:.4f} (+/- {std_accuracy:.4f})\")\n", " print(f\"Mean Training Time: {mean_training_time:.4f}s\")\n", " \n", " print(f\"\\nAll {len(trained_models_multi)} models successfully trained and evaluated\")\n", " print(f\"Dataset: {len(np.unique(y_multi))} musical notes, {X_multi.shape[0]} samples, {X_multi.shape[1]} features\")\n", " \n", "else:\n", " print(\"No results available. Run model training first.\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "d568577d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing functions defined successfully\n" ] } ], "source": [ "# Model Testing Functions\n", "def test_single_audio_file(file_path, models_dict, scaler, label_encoder, feature_extractor):\n", " \"\"\"Test a single audio file with all trained models\"\"\"\n", " print(f\"Testing audio file: {file_path}\")\n", " \n", " try:\n", " # Load audio\n", " y, sr = librosa.load(file_path)\n", " duration = len(y) / sr\n", " print(f\"Audio duration: {duration:.2f}s, Sample rate: {sr} Hz\")\n", " \n", " # Play audio\n", " print(\"Playing audio:\")\n", " display(ipd.Audio(y, rate=sr))\n", " \n", " # Extract features\n", " features = feature_extractor(y, sr)\n", " features_scaled = scaler.transform(features.reshape(1, -1))\n", " \n", " print(f\"\\nPredictions from all {len(models_dict)} models:\")\n", " print(\"-\" * 50)\n", " \n", " predictions = {}\n", " probabilities = {}\n", " \n", " for name, model in models_dict.items():\n", " pred_encoded = model.predict(features_scaled)[0]\n", " pred_note = label_encoder.inverse_transform([pred_encoded])[0]\n", " predictions[name] = pred_note\n", " \n", " # Get probability if available\n", " if hasattr(model, 'predict_proba'):\n", " proba = model.predict_proba(features_scaled)[0]\n", " confidence = proba.max()\n", " probabilities[name] = confidence\n", " print(f\"{name:<25}: {pred_note} (confidence: {confidence:.3f})\")\n", " else:\n", " probabilities[name] = 1.0\n", " print(f\"{name:<25}: {pred_note}\")\n", " \n", " # Majority voting\n", " from collections import Counter\n", " votes = list(predictions.values())\n", " vote_counts = Counter(votes)\n", " majority_vote = vote_counts.most_common(1)[0][0]\n", " vote_confidence = vote_counts.most_common(1)[0][1] / len(votes)\n", " \n", " print(f\"\\nEnsemble Results:\")\n", " print(f\"Majority Vote: {majority_vote} ({vote_confidence:.1%} agreement)\")\n", " print(f\"Vote Distribution: {dict(vote_counts)}\")\n", " \n", " return predictions, probabilities, majority_vote\n", " \n", " except Exception as e:\n", " print(f\"Error testing file: {e}\")\n", " return None, None, None\n", "\n", "def test_random_snippets(audio_data_dict, sample_rates_dict, models_dict, scaler, label_encoder, feature_extractor, n_tests=3):\n", " \"\"\"Test random snippets from loaded audio files\"\"\"\n", " print(f\"Testing {n_tests} random snippets from loaded audio files\")\n", " print(\"=\" * 60)\n", " \n", " import random\n", " \n", " correct_predictions = 0\n", " total_predictions = 0\n", " \n", " for test_num in range(n_tests):\n", " print(f\"\\nTest {test_num + 1}/{n_tests}:\")\n", " print(\"-\" * 30)\n", " \n", " # Select random note and snippet\n", " note = random.choice(list(audio_data_dict.keys()))\n", " y = audio_data_dict[note]\n", " sr = sample_rates_dict[note]\n", " \n", " # Create random 3-second snippet\n", " snippet_duration = 3\n", " max_start = len(y) - int(sr * snippet_duration)\n", " if max_start > 0:\n", " start_idx = random.randint(0, max_start)\n", " end_idx = start_idx + int(sr * snippet_duration)\n", " snippet = y[start_idx:end_idx]\n", " else:\n", " snippet = y # Use full audio if too short\n", " \n", " print(f\"True Note: {note}\")\n", " print(f\"Testing snippet from {start_idx/sr:.1f}s to {end_idx/sr:.1f}s\")\n", " \n", " # Play snippet\n", " print(\"Playing test snippet:\")\n", " display(ipd.Audio(snippet, rate=sr))\n", " \n", " # Test with all models\n", " try:\n", " features = feature_extractor(snippet, sr)\n", " features_scaled = scaler.transform(features.reshape(1, -1))\n", " \n", " model_predictions = {}\n", " correct_count = 0\n", " \n", " for name, model in models_dict.items():\n", " pred_encoded = model.predict(features_scaled)[0]\n", " pred_note = label_encoder.inverse_transform([pred_encoded])[0]\n", " model_predictions[name] = pred_note\n", " \n", " if pred_note == note:\n", " correct_count += 1\n", " \n", " # Show results\n", " accuracy = correct_count / len(models_dict)\n", " print(f\"\\nModel Accuracy: {correct_count}/{len(models_dict)} ({accuracy:.1%})\")\n", " \n", " # Show individual predictions\n", " print(\"Individual Predictions:\")\n", " for name, pred in model_predictions.items():\n", " status = \"βœ“\" if pred == note else \"βœ—\"\n", " print(f\" {status} {name}: {pred}\")\n", " \n", " if accuracy >= 0.5:\n", " correct_predictions += 1\n", " total_predictions += 1\n", " \n", " except Exception as e:\n", " print(f\"Error in prediction: {e}\")\n", " \n", " print(f\"\\n\" + \"=\" * 60)\n", " print(f\"Overall Test Results:\")\n", " print(f\"Tests passed: {correct_predictions}/{total_predictions} ({correct_predictions/total_predictions:.1%})\")\n", "\n", "print(\"Testing functions defined successfully\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "11dc3c98", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting comprehensive model testing...\n", "Comprehensive Model Testing Suite\n", "==================================================\n", "\n", "1. Testing Random Snippets from Loaded Audio Files\n", "Testing 3 random snippets from loaded audio files\n", "============================================================\n", "\n", "Test 1/3:\n", "------------------------------\n", "True Note: G#\n", "Testing snippet from 5.4s to 8.4s\n", "Playing test snippet:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Model Accuracy: 9/10 (90.0%)\n", "Individual Predictions:\n", " βœ“ K-Nearest Neighbors: G#\n", " βœ“ Support Vector Machine: G#\n", " βœ“ Random Forest: G#\n", " βœ“ Logistic Regression: G#\n", " βœ“ Decision Tree: G#\n", " βœ“ Naive Bayes: G#\n", " βœ“ Neural Network: G#\n", " βœ“ Gradient Boosting: G#\n", " βœ— AdaBoost: B\n", " βœ“ Linear Discriminant Analysis: G#\n", "\n", "Test 2/3:\n", "------------------------------\n", "True Note: D#\n", "Testing snippet from 6.0s to 9.0s\n", "Playing test snippet:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Model Accuracy: 10/10 (100.0%)\n", "Individual Predictions:\n", " βœ“ K-Nearest Neighbors: D#\n", " βœ“ Support Vector Machine: D#\n", " βœ“ Random Forest: D#\n", " βœ“ Logistic Regression: D#\n", " βœ“ Decision Tree: D#\n", " βœ“ Naive Bayes: D#\n", " βœ“ Neural Network: D#\n", " βœ“ Gradient Boosting: D#\n", " βœ“ AdaBoost: D#\n", " βœ“ Linear Discriminant Analysis: D#\n", "\n", "Test 3/3:\n", "------------------------------\n", "True Note: C#\n", "Testing snippet from 5.8s to 8.8s\n", "Playing test snippet:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Model Accuracy: 10/10 (100.0%)\n", "Individual Predictions:\n", " βœ“ K-Nearest Neighbors: C#\n", " βœ“ Support Vector Machine: C#\n", " βœ“ Random Forest: C#\n", " βœ“ Logistic Regression: C#\n", " βœ“ Decision Tree: C#\n", " βœ“ Naive Bayes: C#\n", " βœ“ Neural Network: C#\n", " βœ“ Gradient Boosting: C#\n", " βœ“ AdaBoost: C#\n", " βœ“ Linear Discriminant Analysis: C#\n", "\n", "============================================================\n", "Overall Test Results:\n", "Tests passed: 3/3 (100.0%)\n", "\n", "\n", "2. Testing Full Audio Files\n", "------------------------------\n", "\n", "Testing full file for Note A:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteA.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : A (confidence: 1.000)\n", "Support Vector Machine : B (confidence: 0.133)\n", "Random Forest : A (confidence: 0.600)\n", "Logistic Regression : A (confidence: 0.938)\n", "Decision Tree : A (confidence: 1.000)\n", "Naive Bayes : C (confidence: 1.000)\n", "Neural Network : A (confidence: 0.639)\n", "Gradient Boosting : A (confidence: 1.000)\n", "AdaBoost : A (confidence: 0.084)\n", "Linear Discriminant Analysis: A (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: A (80.0% agreement)\n", "Vote Distribution: {'A': 8, 'B': 1, 'C': 1}\n", "βœ“ CORRECT: Ensemble predicted A\n", "\n", "Testing full file for Note A#:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteA#.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : A# (confidence: 1.000)\n", "Support Vector Machine : A# (confidence: 0.136)\n", "Random Forest : A# (confidence: 0.570)\n", "Logistic Regression : A# (confidence: 0.931)\n", "Decision Tree : A# (confidence: 1.000)\n", "Naive Bayes : C (confidence: 1.000)\n", "Neural Network : A# (confidence: 0.728)\n", "Gradient Boosting : A# (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: A# (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: A# (80.0% agreement)\n", "Vote Distribution: {'A#': 8, 'C': 1, 'B': 1}\n", "βœ“ CORRECT: Ensemble predicted A#\n", "\n", "Testing full file for Note B:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteB.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : B (confidence: 1.000)\n", "Support Vector Machine : B (confidence: 0.198)\n", "Random Forest : B (confidence: 0.735)\n", "Logistic Regression : B (confidence: 0.969)\n", "Decision Tree : B (confidence: 1.000)\n", "Naive Bayes : C (confidence: 1.000)\n", "Neural Network : B (confidence: 0.994)\n", "Gradient Boosting : B (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: B (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: B (90.0% agreement)\n", "Vote Distribution: {'B': 9, 'C': 1}\n", "βœ“ CORRECT: Ensemble predicted B\n", "\n", "Testing full file for Note C:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteC.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : C (confidence: 1.000)\n", "Support Vector Machine : C (confidence: 0.208)\n", "Random Forest : C (confidence: 0.775)\n", "Logistic Regression : C (confidence: 0.955)\n", "Decision Tree : C (confidence: 1.000)\n", "Naive Bayes : C (confidence: 1.000)\n", "Neural Network : C (confidence: 0.995)\n", "Gradient Boosting : C (confidence: 1.000)\n", "AdaBoost : C (confidence: 0.084)\n", "Linear Discriminant Analysis: C (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: C (100.0% agreement)\n", "Vote Distribution: {'C': 10}\n", "βœ“ CORRECT: Ensemble predicted C\n", "\n", "Testing full file for Note C#:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteC#.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : C# (confidence: 1.000)\n", "Support Vector Machine : C# (confidence: 0.205)\n", "Random Forest : C# (confidence: 0.745)\n", "Logistic Regression : C# (confidence: 0.966)\n", "Decision Tree : C# (confidence: 1.000)\n", "Naive Bayes : C# (confidence: 1.000)\n", "Neural Network : C# (confidence: 0.968)\n", "Gradient Boosting : C# (confidence: 1.000)\n", "AdaBoost : C# (confidence: 0.084)\n", "Linear Discriminant Analysis: C# (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: C# (100.0% agreement)\n", "Vote Distribution: {'C#': 10}\n", "βœ“ CORRECT: Ensemble predicted C#\n", "\n", "Testing full file for Note D:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteD.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : D (confidence: 1.000)\n", "Support Vector Machine : D (confidence: 0.212)\n", "Random Forest : D (confidence: 0.730)\n", "Logistic Regression : D (confidence: 0.953)\n", "Decision Tree : D (confidence: 1.000)\n", "Naive Bayes : D (confidence: 1.000)\n", "Neural Network : D (confidence: 0.920)\n", "Gradient Boosting : D (confidence: 1.000)\n", "AdaBoost : D (confidence: 0.084)\n", "Linear Discriminant Analysis: D (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: D (100.0% agreement)\n", "Vote Distribution: {'D': 10}\n", "βœ“ CORRECT: Ensemble predicted D\n", "\n", "Testing full file for Note D#:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteD#.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : D# (confidence: 1.000)\n", "Support Vector Machine : D# (confidence: 0.215)\n", "Random Forest : D# (confidence: 0.735)\n", "Logistic Regression : D# (confidence: 0.968)\n", "Decision Tree : D# (confidence: 1.000)\n", "Naive Bayes : D# (confidence: 1.000)\n", "Neural Network : D# (confidence: 0.923)\n", "Gradient Boosting : D# (confidence: 1.000)\n", "AdaBoost : D# (confidence: 0.084)\n", "Linear Discriminant Analysis: D# (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: D# (100.0% agreement)\n", "Vote Distribution: {'D#': 10}\n", "βœ“ CORRECT: Ensemble predicted D#\n", "\n", "Testing full file for Note E:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteE.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : E (confidence: 1.000)\n", "Support Vector Machine : E (confidence: 0.134)\n", "Random Forest : E (confidence: 0.595)\n", "Logistic Regression : E (confidence: 0.965)\n", "Decision Tree : E (confidence: 1.000)\n", "Naive Bayes : F# (confidence: 1.000)\n", "Neural Network : E (confidence: 0.927)\n", "Gradient Boosting : E (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: E (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: E (80.0% agreement)\n", "Vote Distribution: {'E': 8, 'F#': 1, 'B': 1}\n", "βœ“ CORRECT: Ensemble predicted E\n", "\n", "Testing full file for Note F:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteF.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : F (confidence: 1.000)\n", "Support Vector Machine : F (confidence: 0.141)\n", "Random Forest : F (confidence: 0.615)\n", "Logistic Regression : F (confidence: 0.959)\n", "Decision Tree : E (confidence: 1.000)\n", "Naive Bayes : F# (confidence: 1.000)\n", "Neural Network : F (confidence: 0.489)\n", "Gradient Boosting : F (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: F (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: F (70.0% agreement)\n", "Vote Distribution: {'F': 7, 'E': 1, 'F#': 1, 'B': 1}\n", "βœ“ CORRECT: Ensemble predicted F\n", "\n", "Testing full file for Note F#:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteF#.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : F# (confidence: 1.000)\n", "Support Vector Machine : F# (confidence: 0.189)\n", "Random Forest : F# (confidence: 0.715)\n", "Logistic Regression : F# (confidence: 0.969)\n", "Decision Tree : F# (confidence: 1.000)\n", "Naive Bayes : F# (confidence: 1.000)\n", "Neural Network : F# (confidence: 0.990)\n", "Gradient Boosting : F# (confidence: 1.000)\n", "AdaBoost : F# (confidence: 0.084)\n", "Linear Discriminant Analysis: F# (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: F# (100.0% agreement)\n", "Vote Distribution: {'F#': 10}\n", "βœ“ CORRECT: Ensemble predicted F#\n", "\n", "Testing full file for Note G:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteG.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : G (confidence: 1.000)\n", "Support Vector Machine : F# (confidence: 0.145)\n", "Random Forest : G (confidence: 0.455)\n", "Logistic Regression : G (confidence: 0.944)\n", "Decision Tree : G# (confidence: 1.000)\n", "Naive Bayes : F# (confidence: 1.000)\n", "Neural Network : G (confidence: 0.832)\n", "Gradient Boosting : G (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: G (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: G (60.0% agreement)\n", "Vote Distribution: {'G': 6, 'F#': 2, 'G#': 1, 'B': 1}\n", "βœ“ CORRECT: Ensemble predicted G\n", "\n", "Testing full file for Note G#:\n", "Testing audio file: /Users/Parag/Documents/TMRT_data/noteG#.mp3\n", "Audio duration: 20.13s, Sample rate: 22050 Hz\n", "Playing audio:\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predictions from all 10 models:\n", "--------------------------------------------------\n", "K-Nearest Neighbors : G# (confidence: 1.000)\n", "Support Vector Machine : G# (confidence: 0.131)\n", "Random Forest : G# (confidence: 0.440)\n", "Logistic Regression : G# (confidence: 0.979)\n", "Decision Tree : G# (confidence: 1.000)\n", "Naive Bayes : F# (confidence: 1.000)\n", "Neural Network : G# (confidence: 0.981)\n", "Gradient Boosting : G# (confidence: 1.000)\n", "AdaBoost : B (confidence: 0.084)\n", "Linear Discriminant Analysis: G# (confidence: 1.000)\n", "\n", "Ensemble Results:\n", "Majority Vote: G# (80.0% agreement)\n", "Vote Distribution: {'G#': 8, 'F#': 1, 'B': 1}\n", "βœ“ CORRECT: Ensemble predicted G#\n" ] } ], "source": [ "# Interactive Testing Interface and Run Tests\n", "def run_comprehensive_tests():\n", " \"\"\"Run comprehensive tests on the trained models\"\"\"\n", " \n", " print(\"Comprehensive Model Testing Suite\")\n", " print(\"=\" * 50)\n", " \n", " # Test 1: Random snippets from loaded audio\n", " print(\"\\n1. Testing Random Snippets from Loaded Audio Files\")\n", " test_random_snippets(\n", " audio_data, sample_rates, trained_models_multi, \n", " scaler_multi, label_encoder, extract_comprehensive_features, \n", " n_tests=3\n", " )\n", " \n", " # Test 2: Full file predictions\n", " print(f\"\\n\\n2. Testing Full Audio Files\")\n", " print(\"-\" * 30)\n", " \n", " for note, files in audio_files.items():\n", " if len(files) > 0:\n", " print(f\"\\nTesting full file for Note {note}:\")\n", " predictions, probs, majority = test_single_audio_file(\n", " files[0], trained_models_multi, scaler_multi, \n", " label_encoder, extract_comprehensive_features\n", " )\n", " \n", " if majority == note:\n", " print(f\"βœ“ CORRECT: Ensemble predicted {majority}\")\n", " else:\n", " print(f\"βœ— INCORRECT: Ensemble predicted {majority}, expected {note}\")\n", "\n", "# Run the comprehensive test suite\n", "if 'trained_models_multi' in locals() and len(trained_models_multi) > 0:\n", " print(\"Starting comprehensive model testing...\")\n", " run_comprehensive_tests()\n", "else:\n", " print(\"Models not available for testing. Please run the training cells first.\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "547a0ffc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Microphone recognition system loaded!\n", "\n", "Usage:\n", "1. test_microphone() # Test your microphone\n", "2. create_microphone_interface() # Start real-time recognition\n" ] } ], "source": [ "# Real-time Microphone Note Recognition\n", "import threading\n", "import queue\n", "import sounddevice as sd\n", "from collections import deque\n", "import matplotlib.pyplot as plt\n", "from IPython.display import clear_output\n", "import ipywidgets as widgets\n", "from IPython.display import display\n", "\n", "class RealTimeNoteRecognizer:\n", " def __init__(self, models_dict, scaler, label_encoder, feature_extractor, sample_rate=22050):\n", " self.models_dict = models_dict\n", " self.scaler = scaler\n", " self.label_encoder = label_encoder\n", " self.feature_extractor = feature_extractor\n", " self.sample_rate = sample_rate\n", " self.chunk_duration = 2.0 # seconds\n", " self.chunk_samples = int(sample_rate * self.chunk_duration)\n", " self.audio_buffer = deque(maxlen=self.chunk_samples * 2)\n", " self.audio_queue = queue.Queue()\n", " self.is_recording = False\n", " self.recording_thread = None\n", " self.prediction_history = deque(maxlen=10)\n", " \n", " def audio_callback(self, indata, frames, time, status):\n", " \"\"\"Callback function for audio input\"\"\"\n", " if status:\n", " print(f\"Audio status: {status}\")\n", " # Add new audio data to queue\n", " self.audio_queue.put(indata.copy())\n", " \n", " def start_recording(self):\n", " \"\"\"Start real-time audio recording\"\"\"\n", " if self.is_recording:\n", " print(\"Already recording!\")\n", " return\n", " \n", " print(f\"Starting real-time note recognition...\")\n", " print(f\"Sample rate: {self.sample_rate} Hz\")\n", " print(f\"Chunk duration: {self.chunk_duration} seconds\")\n", " print(\"Speak or play musical notes near your microphone!\")\n", " \n", " self.is_recording = True\n", " self.audio_buffer.clear()\n", " \n", " # Start audio stream\n", " self.stream = sd.InputStream(\n", " callback=self.audio_callback,\n", " channels=1,\n", " samplerate=self.sample_rate,\n", " blocksize=1024\n", " )\n", " self.stream.start()\n", " \n", " # Start processing thread\n", " self.recording_thread = threading.Thread(target=self._process_audio)\n", " self.recording_thread.daemon = True\n", " self.recording_thread.start()\n", " \n", " def stop_recording(self):\n", " \"\"\"Stop real-time audio recording\"\"\"\n", " if not self.is_recording:\n", " print(\"Not currently recording!\")\n", " return\n", " \n", " print(\"Stopping recording...\")\n", " self.is_recording = False\n", " \n", " if hasattr(self, 'stream'):\n", " self.stream.stop()\n", " self.stream.close()\n", " \n", " if self.recording_thread:\n", " self.recording_thread.join(timeout=1.0)\n", " \n", " print(\"Recording stopped.\")\n", " \n", " def _process_audio(self):\n", " \"\"\"Process audio data in real-time\"\"\"\n", " while self.is_recording:\n", " try:\n", " # Get audio data from queue\n", " while not self.audio_queue.empty():\n", " chunk = self.audio_queue.get_nowait()\n", " self.audio_buffer.extend(chunk.flatten())\n", " \n", " # Process if we have enough data\n", " if len(self.audio_buffer) >= self.chunk_samples:\n", " # Get latest chunk\n", " audio_chunk = np.array(list(self.audio_buffer)[-self.chunk_samples:])\n", " \n", " # Check if audio has sufficient energy\n", " if np.max(np.abs(audio_chunk)) > 0.01:\n", " self._predict_note(audio_chunk)\n", " \n", " # Small delay to prevent excessive CPU usage\n", " threading.Event().wait(0.1)\n", " \n", " except Exception as e:\n", " print(f\"Error in audio processing: {e}\")\n", " continue\n", " \n", " def _predict_note(self, audio_chunk):\n", " \"\"\"Predict musical note from audio chunk\"\"\"\n", " try:\n", " # Extract features\n", " features = self.feature_extractor(audio_chunk, self.sample_rate)\n", " features_scaled = self.scaler.transform(features.reshape(1, -1))\n", " \n", " # Get predictions from all models\n", " predictions = {}\n", " confidences = {}\n", " \n", " for name, model in self.models_dict.items():\n", " pred_encoded = model.predict(features_scaled)[0]\n", " pred_note = self.label_encoder.inverse_transform([pred_encoded])[0]\n", " predictions[name] = pred_note\n", " \n", " # Get confidence if available\n", " if hasattr(model, 'predict_proba'):\n", " proba = model.predict_proba(features_scaled)[0]\n", " confidences[name] = proba.max()\n", " else:\n", " confidences[name] = 1.0\n", " \n", " # Ensemble prediction (majority voting)\n", " from collections import Counter\n", " votes = list(predictions.values())\n", " vote_counts = Counter(votes)\n", " ensemble_prediction = vote_counts.most_common(1)[0][0]\n", " ensemble_confidence = vote_counts.most_common(1)[0][1] / len(votes)\n", " \n", " # Add to history\n", " self.prediction_history.append({\n", " 'note': ensemble_prediction,\n", " 'confidence': ensemble_confidence,\n", " 'individual_predictions': predictions,\n", " 'individual_confidences': confidences,\n", " 'timestamp': time.time()\n", " })\n", " \n", " # Display prediction\n", " self._display_prediction(ensemble_prediction, ensemble_confidence, predictions, confidences)\n", " \n", " except Exception as e:\n", " print(f\"Prediction error: {e}\")\n", " \n", " def _display_prediction(self, ensemble_note, ensemble_conf, individual_preds, individual_confs):\n", " \"\"\"Display the current prediction\"\"\"\n", " clear_output(wait=True)\n", " \n", " print(\"🎡 REAL-TIME NOTE RECOGNITION 🎡\")\n", " print(\"=\" * 50)\n", " print(f\"🎯 ENSEMBLE PREDICTION: {ensemble_note} (confidence: {ensemble_conf:.1%})\")\n", " print(\"-\" * 50)\n", " \n", " print(\"Individual Model Predictions:\")\n", " for name, pred in individual_preds.items():\n", " conf = individual_confs[name]\n", " status = \"βœ“\" if pred == ensemble_note else \" \"\n", " print(f\" {status} {name:<25}: {pred} ({conf:.3f})\")\n", " \n", " # Show recent history\n", " if len(self.prediction_history) > 1:\n", " print(f\"\\nRecent Predictions:\")\n", " recent = list(self.prediction_history)[-5:] # Last 5 predictions\n", " for i, pred in enumerate(recent):\n", " age = len(recent) - i\n", " print(f\" -{age}: {pred['note']} ({pred['confidence']:.1%})\")\n", " \n", " print(f\"\\nPress the STOP button to end recording...\")\n", "\n", "# Create interactive interface\n", "def create_microphone_interface():\n", " \"\"\"Create interactive microphone recognition interface\"\"\"\n", " \n", " # Check if models are available\n", " if 'trained_models_multi' not in locals() and 'trained_models_multi' not in globals():\n", " print(\"❌ Models not found! Please run the training cells first.\")\n", " return\n", " \n", " # Create recognizer\n", " recognizer = RealTimeNoteRecognizer(\n", " trained_models_multi, \n", " scaler_multi, \n", " label_encoder, \n", " extract_comprehensive_features\n", " )\n", " \n", " # Create interface buttons\n", " start_button = widgets.Button(\n", " description='🎀 START RECORDING',\n", " button_style='success',\n", " layout=widgets.Layout(width='200px')\n", " )\n", " \n", " stop_button = widgets.Button(\n", " description='⏹️ STOP RECORDING', \n", " button_style='danger',\n", " layout=widgets.Layout(width='200px')\n", " )\n", " \n", " status_label = widgets.HTML(value=\"Status: Ready to start\")\n", " \n", " def on_start_clicked(b):\n", " try:\n", " recognizer.start_recording()\n", " status_label.value = \"Status: πŸ”΄ Recording... (listening for notes)\"\n", " start_button.disabled = True\n", " stop_button.disabled = False\n", " except Exception as e:\n", " status_label.value = f\"Status: ❌ Error: {e}\"\n", " \n", " def on_stop_clicked(b):\n", " try:\n", " recognizer.stop_recording()\n", " status_label.value = \"Status: ⏸️ Stopped\"\n", " start_button.disabled = False\n", " stop_button.disabled = True\n", " clear_output(wait=True)\n", " print(\"Microphone recording stopped.\")\n", " except Exception as e:\n", " status_label.value = f\"Status: ❌ Error: {e}\"\n", " \n", " start_button.on_click(on_start_clicked)\n", " stop_button.on_click(on_stop_clicked)\n", " \n", " # Initial button states\n", " stop_button.disabled = True\n", " \n", " # Create layout\n", " button_box = widgets.HBox([start_button, stop_button])\n", " interface = widgets.VBox([\n", " widgets.HTML(\"

🎡 Real-Time Musical Note Recognition

\"),\n", " widgets.HTML(\"

Click START to begin real-time note recognition from your microphone.

\"),\n", " status_label,\n", " button_box\n", " ])\n", " \n", " display(interface)\n", " \n", " return recognizer\n", "\n", "# Test microphone availability\n", "def test_microphone():\n", " \"\"\"Test if microphone is available and working\"\"\"\n", " print(\"Testing microphone availability...\")\n", " \n", " try:\n", " # List available audio devices\n", " print(\"\\nAvailable audio devices:\")\n", " print(sd.query_devices())\n", " \n", " print(f\"\\nDefault input device: {sd.query_devices(kind='input')}\")\n", " \n", " # Test recording\n", " print(\"\\nTesting microphone (recording 2 seconds)...\")\n", " duration = 2.0\n", " sample_rate = 22050\n", " \n", " recording = sd.rec(\n", " int(duration * sample_rate), \n", " samplerate=sample_rate, \n", " channels=1\n", " )\n", " sd.wait() # Wait until recording is finished\n", " \n", " max_amplitude = np.max(np.abs(recording))\n", " print(f\"Max amplitude recorded: {max_amplitude:.4f}\")\n", " \n", " if max_amplitude > 0.001:\n", " print(\"βœ… Microphone is working!\")\n", " \n", " # Play back the recording\n", " print(\"Playing back your recording...\")\n", " display(ipd.Audio(recording.flatten(), rate=sample_rate))\n", " \n", " return True\n", " else:\n", " print(\"⚠️ No sound detected. Check your microphone.\")\n", " return False\n", " \n", " except Exception as e:\n", " print(f\"❌ Microphone test failed: {e}\")\n", " print(\"Make sure you have a microphone connected and permissions enabled.\")\n", " return False\n", "\n", "print(\"Microphone recognition system loaded!\")\n", "print(\"\\nUsage:\")\n", "print(\"1. test_microphone() # Test your microphone\")\n", "print(\"2. create_microphone_interface() # Start real-time recognition\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "a2e04aaf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "βœ… sounddevice already installed\n", "βœ… ipywidgets already installed\n" ] }, { "data": { "application/javascript": "IPython.notebook.kernel.execute('jupyter nbextension enable --py widgetsnbextension')", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Install required packages for microphone input\n", "import subprocess\n", "import sys\n", "\n", "def install_audio_packages():\n", " \"\"\"Install required packages for microphone input\"\"\"\n", " packages = ['sounddevice', 'ipywidgets']\n", " \n", " for package in packages:\n", " try:\n", " __import__(package)\n", " print(f\"βœ… {package} already installed\")\n", " except ImportError:\n", " print(f\"Installing {package}...\")\n", " subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])\n", " print(f\"βœ… {package} installed successfully\")\n", "\n", "# Run installation\n", "install_audio_packages()\n", "\n", "# Enable widgets in Jupyter\n", "try:\n", " from IPython.display import Javascript\n", " display(Javascript(\"IPython.notebook.kernel.execute('jupyter nbextension enable --py widgetsnbextension')\"))\n", "except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 14, "id": "32989b0b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "βœ… sounddevice already installed\n", "βœ… ipywidgets already installed\n" ] }, { "data": { "application/javascript": "IPython.notebook.kernel.execute('jupyter nbextension enable --py widgetsnbextension')", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Install required packages for microphone input\n", "import subprocess\n", "import sys\n", "\n", "def install_audio_packages():\n", " \"\"\"Install required packages for microphone input\"\"\"\n", " packages = ['sounddevice', 'ipywidgets']\n", " \n", " for package in packages:\n", " try:\n", " __import__(package)\n", " print(f\"βœ… {package} already installed\")\n", " except ImportError:\n", " print(f\"Installing {package}...\")\n", " subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])\n", " print(f\"βœ… {package} installed successfully\")\n", "\n", "# Run installation\n", "install_audio_packages()\n", "\n", "# Enable widgets in Jupyter\n", "try:\n", " from IPython.display import Javascript\n", " display(Javascript(\"IPython.notebook.kernel.execute('jupyter nbextension enable --py widgetsnbextension')\"))\n", "except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 15, "id": "8ca6ca14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing microphone availability...\n", "\n", "Available audio devices:\n", "> 0 WH-1000XM4, Core Audio (1 in, 0 out)\n", "< 1 WH-1000XM4, Core Audio (0 in, 2 out)\n", " 2 MacBook Air Microphone, Core Audio (1 in, 0 out)\n", " 3 MacBook Air Speakers, Core Audio (0 in, 2 out)\n", " 4 Microsoft Teams Audio, Core Audio (2 in, 2 out)\n", "\n", "Default input device: {'name': 'WH-1000XM4', 'index': 0, 'hostapi': 0, 'max_input_channels': 1, 'max_output_channels': 0, 'default_low_input_latency': 0.014, 'default_low_output_latency': 0.01, 'default_high_input_latency': 0.03, 'default_high_output_latency': 0.1, 'default_samplerate': 16000.0}\n", "\n", "Testing microphone (recording 2 seconds)...\n", "Max amplitude recorded: 0.2706\n", "βœ… Microphone is working!\n", "Playing back your recording...\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_microphone()" ] }, { "cell_type": "code", "execution_count": 18, "id": "af1ce1fe", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0f8983975ce044ce9b01091077af7c17", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='

🎡 Real-Time Musical Note Recognition

'), HTML(value='

Click START to begi…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "<__main__.RealTimeNoteRecognizer at 0x1699f4740>" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "create_microphone_interface()" ] }, { "cell_type": "code", "execution_count": 27, "id": "9a54b572", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fixed recording functions loaded!\n", "\n", "Usage:\n", "1. test_microphone_simple() # Test microphone first\n", "2. record_5_seconds_fixed() # Record and analyze\n" ] } ], "source": [ "# Modified Real-time Microphone Note Recognition with 5-second timeout\n", "def create_microphone_interface_5sec():\n", " \"\"\"Create interactive microphone recognition interface with 5-second auto-stop\"\"\"\n", " \n", " # Check if models are available\n", " if 'trained_models_multi' not in locals() and 'trained_models_multi' not in globals():\n", " print(\"❌ Models not found! Please run the training cells first.\")\n", " return\n", " \n", " # Create recognizer\n", " recognizer = RealTimeNoteRecognizer(\n", " trained_models_multi, \n", " scaler_multi, \n", " label_encoder, \n", " extract_comprehensive_features\n", " )\n", " \n", " # Create interface buttons\n", " start_button = widgets.Button(\n", " description='🎀 START 5-SEC RECORDING',\n", " button_style='success',\n", " layout=widgets.Layout(width='250px')\n", " )\n", " \n", " stop_button = widgets.Button(\n", " description='⏹️ STOP RECORDING', \n", " button_style='danger',\n", " layout=widgets.Layout(width='200px')\n", " )\n", " \n", " status_label = widgets.HTML(value=\"Status: Ready to start 5-second recording\")\n", " countdown_label = widgets.HTML(value=\"\")\n", " \n", " # Timer variables\n", " timer_thread = None\n", " \n", " def countdown_timer():\n", " \"\"\"5-second countdown timer\"\"\"\n", " for i in range(5, 0, -1):\n", " if not recognizer.is_recording:\n", " break\n", " countdown_label.value = f\"Time remaining: {i} seconds\"\n", " time.sleep(1)\n", " \n", " # Auto-stop after 5 seconds\n", " if recognizer.is_recording:\n", " recognizer.stop_recording()\n", " status_label.value = \"Status: ⏸️ Auto-stopped after 5 seconds\"\n", " countdown_label.value = \"Recording completed!\"\n", " start_button.disabled = False\n", " stop_button.disabled = True\n", " \n", " # Show final results\n", " print(\"\\n🎯 5-SECOND RECORDING COMPLETED!\")\n", " print(\"=\" * 50)\n", " if recognizer.prediction_history:\n", " # Get most recent predictions\n", " recent_preds = list(recognizer.prediction_history)[-3:]\n", " print(\"Final predictions from last 3 chunks:\")\n", " for i, pred in enumerate(recent_preds, 1):\n", " print(f\" {i}. {pred['note']} (confidence: {pred['confidence']:.1%})\")\n", " \n", " # Most common prediction\n", " from collections import Counter\n", " all_notes = [p['note'] for p in recognizer.prediction_history]\n", " note_counts = Counter(all_notes)\n", " most_common = note_counts.most_common(1)[0]\n", " print(f\"\\nπŸ† MOST DETECTED NOTE: {most_common[0]} ({most_common[1]} times)\")\n", " \n", " def on_start_clicked(b):\n", " try:\n", " recognizer.start_recording()\n", " status_label.value = \"Status: πŸ”΄ Recording for 5 seconds...\"\n", " countdown_label.value = \"Time remaining: 5 seconds\"\n", " start_button.disabled = True\n", " stop_button.disabled = False\n", " \n", " # Start countdown timer\n", " timer_thread = threading.Thread(target=countdown_timer, daemon=True)\n", " timer_thread.start()\n", " \n", " except Exception as e:\n", " status_label.value = f\"Status: ❌ Error: {e}\"\n", " \n", " def on_stop_clicked(b):\n", " try:\n", " recognizer.stop_recording()\n", " status_label.value = \"Status: ⏸️ Manually stopped\"\n", " countdown_label.value = \"Recording stopped early\"\n", " start_button.disabled = False\n", " stop_button.disabled = True\n", " clear_output(wait=True)\n", " print(\"Microphone recording stopped manually.\")\n", " except Exception as e:\n", " status_label.value = f\"Status: ❌ Error: {e}\"\n", " \n", " start_button.on_click(on_start_clicked)\n", " stop_button.on_click(on_stop_clicked)\n", " \n", " # Initial button states\n", " stop_button.disabled = True\n", " \n", " # Create layout\n", " button_box = widgets.HBox([start_button, stop_button])\n", " interface = widgets.VBox([\n", " widgets.HTML(\"

🎡 5-Second Musical Note Recognition

\"),\n", " widgets.HTML(\"

Click START to begin 5-second note recognition from your microphone.

\"),\n", " status_label,\n", " countdown_label,\n", " button_box\n", " ])\n", " \n", " display(interface)\n", " \n", " return recognizer\n", "# Fixed 5-second recording function with better error handling\n", "import sounddevice as sd\n", "import numpy as np\n", "from collections import Counter\n", "\n", "def record_5_seconds_fixed():\n", " \"\"\"Fixed version of 5-second recording function with better error handling\"\"\"\n", " print(\"🎀 Starting 5-second musical note recording...\")\n", " print(\"Make sure your microphone is connected and working!\")\n", " \n", " try:\n", " # Check if required variables exist\n", " if 'trained_models_multi' not in globals():\n", " print(\"❌ Error: Models not trained yet. Please run the training cells first.\")\n", " return None, None\n", " \n", " if 'scaler_multi' not in globals():\n", " print(\"❌ Error: Scaler not available. Please run the training cells first.\")\n", " return None, None\n", " \n", " if 'label_encoder' not in globals():\n", " print(\"❌ Error: Label encoder not available. Please run the training cells first.\")\n", " return None, None\n", " \n", " # Record for 5 seconds\n", " duration = 5.0\n", " sample_rate = 22050\n", " \n", " print(f\"πŸ”΄ Recording for {duration} seconds...\")\n", " print(\"Start making sounds now!\")\n", " \n", " # Start recording\n", " recording = sd.rec(\n", " int(duration * sample_rate), \n", " samplerate=sample_rate, \n", " channels=1,\n", " dtype='float64'\n", " )\n", " \n", " # Countdown\n", " import time\n", " for i in range(5, 0, -1):\n", " print(f\"⏰ {i}...\")\n", " time.sleep(1)\n", " \n", " # Wait for recording to finish\n", " sd.wait()\n", " print(\"βœ… Recording complete!\")\n", " \n", " # Check if we got any audio\n", " max_amplitude = np.max(np.abs(recording))\n", " print(f\"Max amplitude recorded: {max_amplitude:.4f}\")\n", " \n", " if max_amplitude < 0.001:\n", " print(\"⚠️ Very low audio detected. Try speaking louder or check your microphone.\")\n", " \n", " # Play back the recording\n", " print(\"\\nπŸ”Š Playing back your 5-second recording:\")\n", " display(ipd.Audio(recording.flatten(), rate=sample_rate))\n", " \n", " # Analyze the recording\n", " print(\"\\nπŸ” Analyzing recording...\")\n", " audio_data = recording.flatten()\n", " \n", " # Extract features\n", " try:\n", " features = extract_comprehensive_features(audio_data, sample_rate)\n", " features_scaled = scaler_multi.transform(features.reshape(1, -1))\n", " except Exception as e:\n", " print(f\"❌ Feature extraction failed: {e}\")\n", " return None, None\n", " \n", " # Get predictions from all models\n", " print(\"\\n🎯 Model Predictions:\")\n", " print(\"-\" * 40)\n", " \n", " predictions = {}\n", " try:\n", " for name, model in trained_models_multi.items():\n", " pred_encoded = model.predict(features_scaled)[0]\n", " pred_note = label_encoder.inverse_transform([pred_encoded])[0]\n", " predictions[name] = pred_note\n", " \n", " if hasattr(model, 'predict_proba'):\n", " proba = model.predict_proba(features_scaled)[0]\n", " confidence = proba.max()\n", " print(f\"{name:<25}: {pred_note} ({confidence:.3f})\")\n", " else:\n", " print(f\"{name:<25}: {pred_note}\")\n", " except Exception as e:\n", " print(f\"❌ Prediction failed: {e}\")\n", " return None, None\n", " \n", " # Ensemble result\n", " votes = list(predictions.values())\n", " vote_counts = Counter(votes)\n", " majority_vote = vote_counts.most_common(1)[0][0]\n", " vote_confidence = vote_counts.most_common(1)[0][1] / len(votes)\n", " \n", " print(f\"\\nπŸ† FINAL RESULT:\")\n", " print(f\"Detected Note: {majority_vote}\")\n", " print(f\"Confidence: {vote_confidence:.1%} ({vote_counts.most_common(1)[0][1]}/{len(votes)} models)\")\n", " print(f\"Vote Distribution: {dict(vote_counts)}\")\n", " \n", " return majority_vote, predictions\n", " \n", " except Exception as e:\n", " print(f\"❌ Recording failed with error: {e}\")\n", " print(f\"Error type: {type(e).__name__}\")\n", " \n", " # Specific error handling\n", " if \"sounddevice\" in str(e).lower():\n", " print(\"πŸ’‘ Microphone issue detected. Try:\")\n", " print(\" 1. Check if your microphone is connected\")\n", " print(\" 2. Grant microphone permissions to Jupyter\")\n", " print(\" 3. Restart your kernel and try again\")\n", " elif \"trained_models_multi\" in str(e):\n", " print(\"πŸ’‘ Models not found. Please run all training cells first.\")\n", " \n", " return None, None\n", "\n", "# Test microphone first\n", "def test_microphone_simple():\n", " \"\"\"Simple microphone test\"\"\"\n", " try:\n", " print(\"Testing microphone (2 seconds)...\")\n", " duration = 2.0\n", " sample_rate = 22050\n", " \n", " recording = sd.rec(\n", " int(duration * sample_rate), \n", " samplerate=sample_rate, \n", " channels=1\n", " )\n", " sd.wait()\n", " \n", " max_amplitude = np.max(np.abs(recording))\n", " print(f\"Max amplitude: {max_amplitude:.4f}\")\n", " \n", " if max_amplitude > 0.001:\n", " print(\"βœ… Microphone working!\")\n", " return True\n", " else:\n", " print(\"⚠️ No sound detected\")\n", " return False\n", " \n", " except Exception as e:\n", " print(f\"❌ Microphone test failed: {e}\")\n", " return False\n", "\n", "print(\"Fixed recording functions loaded!\")\n", "print(\"\\nUsage:\")\n", "print(\"1. test_microphone_simple() # Test microphone first\")\n", "print(\"2. record_5_seconds_fixed() # Record and analyze\")" ] }, { "cell_type": "code", "execution_count": 28, "id": "e414eae5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🎀 Starting 5-second musical note recording...\n", "Speak or play musical notes now!\n", "Recording for 5.0 seconds...\n", "||PaMacCore (AUHAL)|| Warning on line 521: err=''!obj'', msg=Unknown Error\n", "||PaMacCore (AUHAL)|| Warning on line 441: err=''!obj'', msg=Unknown Error\n", "||PaMacCore (AUHAL)|| Error on line 1322: err='-10851', msg=Audio Unit: Invalid Property Value\n", "❌ Recording failed: Error opening InputStream: Internal PortAudio error [PaErrorCode -9986]\n" ] }, { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "record_5_seconds()" ] }, { "cell_type": "code", "execution_count": 29, "id": "0546a1e0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing microphone (2 seconds)...\n", "||PaMacCore (AUHAL)|| Warning on line 521: err=''!obj'', msg=Unknown Error\n", "||PaMacCore (AUHAL)|| Warning on line 441: err=''!obj'', msg=Unknown Error\n", "||PaMacCore (AUHAL)|| Error on line 1322: err='-10851', msg=Audio Unit: Invalid Property Value\n", "❌ Microphone test failed: Error opening InputStream: Internal PortAudio error [PaErrorCode -9986]\n" ] }, { "data": { "text/plain": [ "False" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Test your microphone first\n", "test_microphone_simple()" ] }, { "cell_type": "code", "execution_count": 30, "id": "843f15b3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "βœ… Simple recording function created!\n", "Now try: record_5_seconds()\n" ] } ], "source": [ "# Simple 5-second recording function that works\n", "def record_5_seconds():\n", " \"\"\"Simple 5-second recording and analysis function\"\"\"\n", " print(\"🎀 Starting 5-second musical note recording...\")\n", " \n", " try:\n", " # Import required libraries\n", " import sounddevice as sd\n", " import numpy as np\n", " from collections import Counter\n", " \n", " # Check if models exist\n", " try:\n", " trained_models_multi\n", " scaler_multi\n", " label_encoder\n", " extract_comprehensive_features\n", " except NameError as e:\n", " print(f\"❌ Missing required variable: {e}\")\n", " print(\"Please run all the training cells first!\")\n", " return None, None\n", " \n", " # Recording parameters\n", " duration = 5.0\n", " sample_rate = 22050\n", " \n", " print(f\"πŸ”΄ Recording for {duration} seconds...\")\n", " print(\"Start making sounds now!\")\n", " \n", " # Record audio\n", " recording = sd.rec(\n", " int(duration * sample_rate), \n", " samplerate=sample_rate, \n", " channels=1,\n", " dtype='float64'\n", " )\n", " \n", " # Countdown\n", " for i in range(5, 0, -1):\n", " print(f\"⏰ {i}...\")\n", " time.sleep(1)\n", " \n", " # Wait for recording to complete\n", " sd.wait()\n", " print(\"βœ… Recording complete!\")\n", " \n", " # Check audio level\n", " audio_flat = recording.flatten()\n", " max_amplitude = np.max(np.abs(audio_flat))\n", " print(f\"Max amplitude: {max_amplitude:.4f}\")\n", " \n", " if max_amplitude < 0.001:\n", " print(\"⚠️ Very quiet audio. Try making louder sounds.\")\n", " \n", " # Play back recording\n", " print(\"\\nπŸ”Š Playing back your recording:\")\n", " display(ipd.Audio(audio_flat, rate=sample_rate))\n", " \n", " # Extract features and predict\n", " print(\"\\nπŸ” Analyzing audio...\")\n", " \n", " try:\n", " features = extract_comprehensive_features(audio_flat, sample_rate)\n", " features_scaled = scaler_multi.transform(features.reshape(1, -1))\n", " \n", " print(\"\\n🎯 Model Predictions:\")\n", " print(\"-\" * 40)\n", " \n", " predictions = {}\n", " for name, model in trained_models_multi.items():\n", " pred_encoded = model.predict(features_scaled)[0]\n", " pred_note = label_encoder.inverse_transform([pred_encoded])[0]\n", " predictions[name] = pred_note\n", " \n", " # Get confidence if available\n", " if hasattr(model, 'predict_proba'):\n", " proba = model.predict_proba(features_scaled)[0]\n", " confidence = proba.max()\n", " print(f\"{name:<25}: {pred_note} ({confidence:.3f})\")\n", " else:\n", " print(f\"{name:<25}: {pred_note}\")\n", " \n", " # Ensemble result\n", " votes = list(predictions.values())\n", " vote_counts = Counter(votes)\n", " majority_vote = vote_counts.most_common(1)[0][0]\n", " vote_confidence = vote_counts.most_common(1)[0][1] / len(votes)\n", " \n", " print(f\"\\nπŸ† FINAL RESULT:\")\n", " print(f\"Detected Note: {majority_vote}\")\n", " print(f\"Confidence: {vote_confidence:.1%}\")\n", " print(f\"Votes: {dict(vote_counts)}\")\n", " \n", " return majority_vote, predictions\n", " \n", " except Exception as e:\n", " print(f\"❌ Analysis failed: {e}\")\n", " return None, None\n", " \n", " except ImportError as e:\n", " print(f\"❌ Missing package: {e}\")\n", " print(\"Run: !pip install sounddevice\")\n", " return None, None\n", " except Exception as e:\n", " print(f\"❌ Recording failed: {e}\")\n", " print(\"Check your microphone permissions and connection.\")\n", " return None, None\n", "\n", "print(\"βœ… Simple recording function created!\")\n", "print(\"Now try: record_5_seconds()\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "3b3fd4cd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🎀 Starting 5-second musical note recording...\n", "πŸ”΄ Recording for 5.0 seconds...\n", "Start making sounds now!\n", "||PaMacCore (AUHAL)|| Warning on line 521: err=''!obj'', msg=Unknown Error\n", "||PaMacCore (AUHAL)|| Warning on line 441: err=''!obj'', msg=Unknown Error\n", "❌ Recording failed: Error opening InputStream: Internal PortAudio error [PaErrorCode -9986]\n", "Check your microphone permissions and connection.\n", "||PaMacCore (AUHAL)|| Error on line 1322: err='-10851', msg=Audio Unit: Invalid Property Value\n" ] }, { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "record_5_seconds()" ] }, { "cell_type": "code", "execution_count": null, "id": "43b94212", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }